Audio corruptions examples¶
Imports & Definintion of necessary functions
In [2]:
from examples_helper import *
Load a neutral utterance and generate its corrupted versions¶
Load the original neutral utterance
In [40]:
iemocap_audio = "../../iemocap/neutral.wav"
label = "neutral"
audio_data, sample_rate = librosa.load(iemocap_audio, sr=None)
get_spectrogram(iemocap_audio, label)
ipd.Audio(iemocap_audio)
Out[40]:
Add gaussian noise (10 SNR)¶
In [4]:
neutral_gaussian = gaussian_corruption(label, audio_data, sample_rate)
get_spectrogram(neutral_gaussian, label)
ipd.Audio(neutral_gaussian)
Out[4]:
Add clipping distortion (40%)¶
In [5]:
neutral_clipping = clipping_corruption(label, audio_data, sample_rate)
get_spectrogram(neutral_clipping, label)
ipd.Audio(neutral_clipping)
Out[5]:
Compress audio (8kbps bit rate)¶
In [6]:
compressed_file = compress_audio(iemocap_audio, label, sample_rate)
get_spectrogram(compressed_file, label)
ipd.Audio(compressed_file)
Out[6]:
Add gain transition (in [-30, -10] dB range)¶
In [7]:
neutral_gain = gain_corruption(label, audio_data, sample_rate)
get_spectrogram(neutral_gain, label)
ipd.Audio(neutral_gain)
Out[7]:
Add reverberation (rt60 in 0.1 to 0.5 seconds)¶
In [8]:
reverb_audio = reverberation(label, audio_data, sample_rate)
get_spectrogram(reverb_audio, label)
ipd.Audio(reverb_audio)
Selected 1 impulse responses from ../../datasets/EchoThiefImpulseResponseLibrary/Underground with RT60 in range [0.1, 0.5]
Out[8]:
Add background noise from ESC50 dataset (0dB)¶
In [9]:
esc_augmented = background_noise(label, audio_data, sample_rate, esc_config)
get_spectrogram(esc_augmented, label)
ipd.Audio(esc_augmented)
Corruption file: 2-117615-D-48.wav
Out[9]:
Add background noise from MUSAN (10dB)¶
In [42]:
musan_augmented = background_noise(label, audio_data, sample_rate, musan_config)
get_spectrogram(musan_augmented, label)
ipd.Audio(musan_augmented)
Corruption file: noise-free-sound-0755.wav
Out[42]:
Add background noise from urbansound8k (20dB)¶
In [11]:
urban_augmented = background_noise(label, audio_data, sample_rate, urban_config)
get_spectrogram(urban_augmented, label)
ipd.Audio(urban_augmented)
Corruption file: 195969-0-0-24.wav
Out[11]:
Load a happy utterance and generate its corrupted versions¶
Load the original happy utterance
In [12]:
iemocap_audio = "../../iemocap/happy.wav"
label = "happy"
audio_data, sample_rate = librosa.load(iemocap_audio, sr=None)
get_spectrogram(iemocap_audio, label)
ipd.Audio(iemocap_audio)
Out[12]:
Add gaussian noise (10 SNR)¶
In [13]:
neutral_gaussian = gaussian_corruption(label, audio_data, sample_rate)
get_spectrogram(neutral_gaussian, label)
ipd.Audio(neutral_gaussian)
Out[13]:
Add clipping distortion (40%)¶
In [14]:
neutral_clipping = clipping_corruption(label, audio_data, sample_rate)
get_spectrogram(neutral_clipping, label)
ipd.Audio(neutral_clipping)
Out[14]:
Compress audio (8kbps bit rate)¶
In [15]:
compressed_file = compress_audio(iemocap_audio, label, sample_rate)
get_spectrogram(compressed_file, label)
ipd.Audio(compressed_file)
Out[15]:
Add gain transition (in [-30, -10] dB range)¶
In [16]:
neutral_gain = gain_corruption(label, audio_data, sample_rate)
get_spectrogram(neutral_gain, label)
ipd.Audio(neutral_gain)
Out[16]:
Add reverberation (rt60 in 0.1 to 0.5 seconds)¶
In [17]:
reverb_audio = reverberation(label, audio_data, sample_rate)
get_spectrogram(reverb_audio, label)
ipd.Audio(reverb_audio)
Selected 1 impulse responses from ../../datasets/EchoThiefImpulseResponseLibrary/Underground with RT60 in range [0.1, 0.5]
Out[17]:
Add background noise from ESC50 dataset (0dB)¶
In [18]:
esc_augmented = background_noise(label, audio_data, sample_rate, esc_config)
get_spectrogram(esc_augmented, label)
ipd.Audio(esc_augmented)
Corruption file: 2-117615-D-48.wav
Out[18]:
Add background noise from MUSAN (10dB)¶
In [19]:
musan_augmented = background_noise(label, audio_data, sample_rate, musan_config)
get_spectrogram(musan_augmented, label)
ipd.Audio(musan_augmented)
Corruption file: music-jamendo-0064.wav
Out[19]:
Add background noise from urbansound8k (20dB)¶
In [20]:
urban_augmented = background_noise(label, audio_data, sample_rate, urban_config)
get_spectrogram(urban_augmented, label)
ipd.Audio(urban_augmented)
Corruption file: 70098-3-1-0.wav
Out[20]:
Load a sad utterance and generate its corrupted versions¶
Load the original sad utterance
In [21]:
iemocap_audio = "../../iemocap/sad.wav"
label = "sad"
audio_data, sample_rate = librosa.load(iemocap_audio, sr=None)
get_spectrogram(iemocap_audio, label)
ipd.Audio(iemocap_audio)
Out[21]:
Add gaussian noise (10 SNR)¶
In [22]:
neutral_gaussian = gaussian_corruption(label, audio_data, sample_rate)
get_spectrogram(neutral_gaussian, label)
ipd.Audio(neutral_gaussian)
Out[22]:
Add clipping distortion (40%)¶
In [23]:
neutral_clipping = clipping_corruption(label, audio_data, sample_rate)
get_spectrogram(neutral_clipping, label)
ipd.Audio(neutral_clipping)
Out[23]:
Compress audio (8kbps bit rate)¶
In [24]:
compressed_file = compress_audio(iemocap_audio, label, sample_rate)
get_spectrogram(compressed_file, label)
ipd.Audio(compressed_file)
Out[24]:
Add gain transition (in [-30, -10] dB range)¶
In [25]:
neutral_gain = gain_corruption(label, audio_data, sample_rate)
get_spectrogram(neutral_gain, label)
ipd.Audio(neutral_gain)
Out[25]:
Add reverberation (rt60 in 0.1 to 0.5 seconds)¶
In [26]:
reverb_audio = reverberation(label, audio_data, sample_rate)
get_spectrogram(reverb_audio, label)
ipd.Audio(reverb_audio)
Selected 1 impulse responses from ../../datasets/EchoThiefImpulseResponseLibrary/Underground with RT60 in range [0.1, 0.5]
Out[26]:
Add background noise from ESC50 dataset (0dB)¶
In [27]:
esc_augmented = background_noise(label, audio_data, sample_rate, esc_config)
get_spectrogram(esc_augmented, label)
ipd.Audio(esc_augmented)
Corruption file: 2-117615-D-48.wav
Out[27]:
Add background noise from MUSAN (10dB)¶
In [28]:
musan_augmented = background_noise(label, audio_data, sample_rate, musan_config)
get_spectrogram(musan_augmented, label)
ipd.Audio(musan_augmented)
Corruption file: noise-free-sound-0144.wav
Out[28]:
Add background noise from urbansound8k (20dB)¶
In [29]:
urban_augmented = background_noise(label, audio_data, sample_rate, urban_config)
get_spectrogram(urban_augmented, label)
ipd.Audio(urban_augmented)
Corruption file: 91396-8-0-2.wav
Out[29]:
Load an angry utterance and generate its corrupted versions¶
Load the original angry utterance
In [3]:
iemocap_audio = "../../iemocap/angry.wav"
label = "angry"
audio_data, sample_rate = librosa.load(iemocap_audio, sr=None)
get_spectrogram(iemocap_audio, label)
ipd.Audio(iemocap_audio)
Out[3]:
Add gaussian noise (10 SNR)¶
In [31]:
neutral_gaussian = gaussian_corruption(label, audio_data, sample_rate)
get_spectrogram(neutral_gaussian, label)
ipd.Audio(neutral_gaussian)
Out[31]:
Add clipping distortion (40%)¶
In [32]:
neutral_clipping = clipping_corruption(label, audio_data, sample_rate)
get_spectrogram(neutral_clipping, label)
ipd.Audio(neutral_clipping)
Out[32]:
Compress audio (8kbps bit rate)¶
In [33]:
compressed_file = compress_audio(iemocap_audio, label, sample_rate)
get_spectrogram(compressed_file, label)
ipd.Audio(compressed_file)
Out[33]:
Add gain transition (in [-30, -10] dB range)¶
In [34]:
neutral_gain = gain_corruption(label, audio_data, sample_rate)
get_spectrogram(neutral_gain, label)
ipd.Audio(neutral_gain)
Out[34]:
Add reverberation (rt60 in 0.1 to 0.5 seconds)¶
In [35]:
reverb_audio = reverberation(label, audio_data, sample_rate)
get_spectrogram(reverb_audio, label)
ipd.Audio(reverb_audio)
Selected 1 impulse responses from ../../datasets/EchoThiefImpulseResponseLibrary/Underground with RT60 in range [0.1, 0.5]
Out[35]:
Add background noise from ESC50 dataset (0dB)¶
In [4]:
esc_augmented = background_noise(label, audio_data, sample_rate, esc_config)
get_spectrogram(esc_augmented, label)
ipd.Audio(esc_augmented)
Corruption file: 1-47714-A-16.wav
Out[4]:
Add background noise from MUSAN (10dB)¶
In [45]:
musan_augmented = background_noise(label, audio_data, sample_rate, musan_config)
get_spectrogram(musan_augmented, label)
ipd.Audio(musan_augmented)
Corruption file: noise-free-sound-0323.wav
Out[45]:
Add background noise from urbansound8k (20dB)¶
In [46]:
urban_augmented = background_noise(label, audio_data, sample_rate, urban_config)
get_spectrogram(urban_augmented, label)
ipd.Audio(urban_augmented)
Corruption file: 115415-9-0-7.wav
Out[46]:
In [ ]: